package com.cdk8s.example.simplespringboot.utils;

import cn.hutool.core.util.URLUtil;


import com.cdk8s.example.simplespringboot.utils.pojo.TopHotInfo;
import com.cdk8s.example.simplespringboot.utils.pojo.WebsiteInfo;
import com.cdk8s.example.simplespringboot.utils.tracking.EventTrackingUtil;
import com.fasterxml.jackson.core.type.TypeReference;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Connection;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.net.ssl.*;
import java.io.File;
import java.io.IOException;
import java.net.ConnectException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.text.Collator;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * Jsoup 常用 select：https://blog.csdn.net/wangjunjun2008/article/details/50513528
 */
@Slf4j
public final class JsoupUtil {

	// =====================================网址解析 start=====================================

	public static List<String> getSubUrlList(String url, String cssQuery, String attributeKey, Boolean removeDuplicate) {
		// 先标准化 url，避免那些多个斜杠的，反斜杠的
		url = URLUtil.normalize(url);

		// 提取根域名
		String baseUrl = URLUtil.getHost(URLUtil.toUrlForHttp(url)).toString();
		String urlScheme = URLUtil.getHost(URLUtil.toUrlForHttp(url)).getScheme();
		String urlHost = URLUtil.getHost(URLUtil.toUrlForHttp(url)).getHost();

		Document document = getDocumentByUrl(baseUrl, urlHost, 5000);

		return getSubUrlListCore(document, cssQuery, attributeKey, removeDuplicate);
	}

	/**
	 * 通过文件抓取
	 *
	 * @param sourceUrl    只是记录来自哪个网站，没有实际意义，主要还是从文件里面解析内容
	 * @param fileFullPath
	 * @param cssQuery
	 * @param attributeKey
	 * @return
	 */
	@SneakyThrows
	public static List<String> getSubUrlListByFile(String sourceUrl, String fileFullPath, String cssQuery, String attributeKey, Boolean removeDuplicate) {
		File input = new File(fileFullPath);
		Document document = Jsoup.parse(input, "UTF-8", sourceUrl);

		return getSubUrlListCore(document, cssQuery, attributeKey, removeDuplicate);
	}

	/**
	 * 获取网站信息
	 *
	 * @param url
	 * @param document 有些是用 selenium 获取了 Document，则不用再发起请求了
	 *                 String pageHtmlSource = webDriver.getPageSource();
	 *                 Document document = Jsoup.parse(pageHtmlSource);
	 * @return
	 */
	public static WebsiteInfo getWebsiteInfo(String url, Document document) {
		// 先标准化 url，避免那些多个斜杠的，反斜杠的
		url = URLUtil.normalize(url);

		// 提取根域名
		String baseUrl = null;
		try {
			baseUrl = URLUtil.getHost(URLUtil.toUrlForHttp(url)).toString();
		} catch (Exception e) {
			// 有些链接比较特殊，最终提取出来也是有问题，会报错，所以这种我们直接忽略
			return null;
		}
		String urlScheme = URLUtil.getHost(URLUtil.toUrlForHttp(url)).getScheme();
		String urlHost = URLUtil.getHost(URLUtil.toUrlForHttp(url)).getHost();

		if (null == document) {
			document = getDocumentByUrl(baseUrl, urlHost, 5000);
		}

		if (null != document) {
			String title = getTitle(document);
			Set<String> faviconLinkSet = getFaviconLink(document, baseUrl, urlScheme);//获取 icon 图标
			String keywordsContent = getKeywords(document);
			String descriptionContent = getDescription(document);

			// 避免组装 sql 的时候多一个符号出问题
			title = StringUtil.remove(title, "'");
			keywordsContent = StringUtil.remove(keywordsContent, "'");
			keywordsContent = replaceBlank(keywordsContent);
			descriptionContent = StringUtil.remove(descriptionContent, "'");

			log.info("=================start================");
			log.info("------zch---getWebsiteInfo---baseUrl <{}>", baseUrl);
			log.info("------zch---getWebsiteInfo---title <{}>", title);
			log.info("------zch---getWebsiteInfo---keywordsContent <{}>", keywordsContent);
			log.info("------zch---getWebsiteInfo---descriptionContent <{}>", descriptionContent);
			log.info("=================end================");

			if (StringUtil.isBlank(title)) {
				log.error("------zch---getWebsiteInfo---<{}> 做了反扒处理，里面都是 JS", baseUrl);
			} else {
				WebsiteInfo websiteInfo = new WebsiteInfo();
				websiteInfo.setBaseUrl(baseUrl);
				websiteInfo.setTitle(title);
				websiteInfo.setIconUrl(CollectionUtil.toArray(faviconLinkSet, String.class)[0]);
				websiteInfo.setIconUrlSet(faviconLinkSet);
				websiteInfo.setKeywords(keywordsContent);
				websiteInfo.setDescription(descriptionContent);
				return websiteInfo;
			}
		}

		return null;
	}

	// 浏览器带的请求头（firefox）
	// GET / HTTP/2
	// Host: tophub.today
	// User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0
	// Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
	// Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
	// Accept-Encoding: gzip, deflate, br
	// Connection: keep-alive
	// Upgrade-Insecure-Requests: 1
	public static List<TopHotInfo> getTopHot() {
		String url = "https://tophub.today";
		Document document = getDocumentByUrl(url, "tophub.today", 30000);

		if (null != document) {
			Element body = document.body();
			if (null != body) {
				Map<String, String> nodeMap = new HashMap<>();
				// null 表示不抓取
				nodeMap.put("1-常用栏目", null);
				nodeMap.put("微博热点", "node-1");
				nodeMap.put("知乎热点", "node-6");
				nodeMap.put("哔哩日榜", "node-19");
				nodeMap.put("哔哩周榜", "node-9402");
				nodeMap.put("IT之家", "node-119");
				nodeMap.put("掘金热点", "node-100");
				nodeMap.put("开发者头条", "node-132");
				nodeMap.put("产品经理", "node-213");
				nodeMap.put("产品100", "node-300");
				nodeMap.put("36氪热点", "node-11");
				nodeMap.put("少数派", "node-137");
				nodeMap.put("开眼视频", "node-289");
				nodeMap.put("3DM游戏", "node-3254");
				nodeMap.put("起点中文", "node-5832");
				nodeMap.put("纵横中文", "node-5846");
				nodeMap.put("什么值得买", "node-167");

				Map<String, String> nodeUrlMap = new HashMap<>();
				nodeUrlMap.put("1-常用栏目", "https://tophub.today/c/news");
				nodeUrlMap.put("微博热点", "https://tophub.today/n/KqndgxeLl9");
				nodeUrlMap.put("知乎热点", "https://tophub.today/n/mproPpoq6O");
				nodeUrlMap.put("哔哩日榜", "https://tophub.today/n/74KvxwokxM");
				nodeUrlMap.put("哔哩周榜", "https://tophub.today/n/b0vmbRXdB1");
				nodeUrlMap.put("IT之家", "https://tophub.today/n/74Kvx59dkx");
				nodeUrlMap.put("掘金热点", "https://tophub.today/n/QaqeEaVe9R");
				nodeUrlMap.put("开发者头条", "https://tophub.today/n/5VaobmGeAj");
				nodeUrlMap.put("产品经理", "https://tophub.today/n/20MdKx4ew1");
				nodeUrlMap.put("产品100", "https://tophub.today/n/QaqeEYxe9R");
				nodeUrlMap.put("36氪热点", "https://tophub.today/n/Q1Vd5Ko85R");
				nodeUrlMap.put("少数派", "https://tophub.today/n/Y2KeDGQdNP");
				nodeUrlMap.put("开眼视频", "https://tophub.today/n/KqndgDmeLl");
				nodeUrlMap.put("3DM游戏", "https://tophub.today/n/YqoXQR0vOD");
				nodeUrlMap.put("起点中文", "https://tophub.today/n/VaobmGneAj");
				nodeUrlMap.put("纵横中文", "https://tophub.today/n/b0vmYyJvB1");
				nodeUrlMap.put("什么值得买", "https://tophub.today/n/K7GdagpoQy");


				List<TopHotInfo> topHubList = new ArrayList<>();
				for (Map.Entry<String, String> entry : nodeMap.entrySet()) {
					TopHotInfo topHubInfo = new TopHotInfo();
					topHubInfo.setColumnName(entry.getKey());
					topHubInfo.setGotoUrl(nodeUrlMap.get(entry.getKey()));

					List<TopHotInfo.UrlListBean> urlList = new ArrayList<>();
					if (StringUtil.isBlank(entry.getValue())) {
						if (entry.getKey().equalsIgnoreCase("1-常用栏目")) {
							String topHotColumnListString = FileUtil.readFileToStringByClasspath("/jsonData/topHotColumnList.json");
							List<TopHotInfo> topHotColumnList = JsonUtil.toList(topHotColumnListString, new TypeReference<List<TopHotInfo>>() {
							});

							if (CollectionUtil.isNotEmpty(topHotColumnList)) {
								for (TopHotInfo info : topHotColumnList) {
									TopHotInfo.UrlListBean urlListBean = new TopHotInfo.UrlListBean();
									urlListBean.setHref(info.getGotoUrl());
									urlListBean.setTitle(info.getColumnName() + "（" + info.getSubColumnName() + "）");
									urlList.add(urlListBean);
								}
								topHubInfo.setUrlList(urlList);
								topHubList.add(topHubInfo);
							}
						}
						continue;
					}

					// 抓取网站各个模块的最新数据
					Elements elements = body.select("#" + entry.getValue());
					if (CollectionUtil.isNotEmpty(elements)) {
						Elements aSelects = elements.select(".nano-content > a");
						if (CollectionUtil.isNotEmpty(aSelects)) {
							for (Element element : aSelects) {
								String href = element.attr("href");
								Elements titleElement = element.select(".cc-cd-cb-ll > .t");
								if (null != titleElement) {
									String title = titleElement.text();
									TopHotInfo.UrlListBean urlListBean = new TopHotInfo.UrlListBean();
									urlListBean.setHref(href);
									urlListBean.setTitle(title);
									urlList.add(urlListBean);
								}
							}
							topHubInfo.setUrlList(urlList);
							topHubList.add(topHubInfo);
						}
					}
				}

				// 根据首字母进行正序排序
				Collator collator = Collator.getInstance(Locale.CHINA);
				topHubList.sort((a, b) -> collator.compare(a.getColumnName(), b.getColumnName()));

				return topHubList;
			}
		}
		log.error("------zch------今日热榜无法抓到数据");
		return null;
	}

	/**
	 * 获取各个栏目的名称和对应模块独立页面的链接地址
	 *
	 * @return
	 */
	public static List<TopHotInfo> getTopHotColumnUrl() {
		// 一页 12 个栏目，总共 274 栏目，差不多有二十多页，这里取前 20 页
		List<TopHotInfo> totalTopHubList = new ArrayList<>();
		for (int i = 1; i < 21; i++) {
			// 睡眠 5 秒避免被屏蔽
			ThreadUtil.sleepBySeconds(5);

			List<TopHotInfo> topHubList = new ArrayList<>();

			String url = "https://tophub.today/c/news?p=" + i;
			Document document = getDocumentByUrl(url, "tophub.today", 10000);
			if (null == document) {
				log.error("------zch------document 为空 <{}>", url);
				continue;
			}
			Element body = document.body();
			if (null != body) {
				Elements elements = body.select(".cc-cd-ih");

				if (CollectionUtil.isNotEmpty(elements)) {
					for (Element element : elements) {
						Elements elementsByGotoUrl = element.select(".cc-cd-is > a");// 获取跳转链接
						Elements elementsByColumnName = element.select(".cc-cd-is > a > div");// 获取左边标题1
						Elements elementsBySubColumnName = element.select(".cc-cd-sb > .cc-cd-sb-ss > .cc-cd-sb-st");// 获取右边子标题2

						TopHotInfo topHubInfo = null;
						if (CollectionUtil.isNotEmpty(elementsByGotoUrl)) {
							topHubInfo = new TopHotInfo();
							for (Element elementSub : elementsByGotoUrl) {
								String href = elementSub.attr("href");
								topHubInfo.setGotoUrl("https://tophub.today" + href);
							}
						}

						if (null != topHubInfo) {
							if (CollectionUtil.isNotEmpty(elementsByColumnName)) {
								for (Element elementSub : elementsByColumnName) {
									String columnName = elementSub.text();
									topHubInfo.setColumnName(StringUtil.trim(columnName));
								}
							}

							if (CollectionUtil.isNotEmpty(elementsBySubColumnName)) {
								for (Element elementSub : elementsBySubColumnName) {
									String subColumnName = elementSub.text();
									topHubInfo.setSubColumnName(StringUtil.trim(subColumnName));
								}
							}

							topHubList.add(topHubInfo);
						}

					}
				}

			}

			if (CollectionUtil.isNotEmpty(topHubList)) {
				totalTopHubList.addAll(topHubList);
			}

		}

		return totalTopHubList;
	}


	public static Document getDocumentByUrl(String url, String urlHost, int timeout) {
		Connection connect = Jsoup.connect(url).ignoreContentType(true);
		connect.userAgent(UserAgentUtil.getRandomUserAgent());
		connect.header("Host", urlHost);
		connect.header("Referer", "https://www.baidu.com/baidu?wd=" + urlHost);
		connect.header("Connection", "keep-alive");
		connect.header("upgrade-insecure-requests", "1");
		connect.timeout(timeout);
		connect.sslSocketFactory(jsoupSocketFactory());

		Document document = null;
		try {
			document = connect.get();
		} catch (SocketTimeoutException e) {
			EventTrackingUtil.log(url, "GetDocSocketTimeoutException");
			log.error("------zch---getWebsiteInfo---请求 url 超时：<{}> ", url);
			return null;
		} catch (ConnectException e) {
			EventTrackingUtil.log(url, "GetDocConnectException");
			log.error("------zch---getWebsiteInfo---请求 url 连接被拒绝：<{}> ", url);
			return null;
		} catch (UnknownHostException e) {
			EventTrackingUtil.log(url, "GetDocUnknownHostException");
			log.error("------zch---getWebsiteInfo---请求 url 不存在或已失效：<{}> ", url);
			return null;
		} catch (SSLHandshakeException e) {
			EventTrackingUtil.log(url, "GetDocSSLHandshakeException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机在握手期间关闭连接：<{}> ", url);
			return null;
		} catch (HttpStatusException e) {
			EventTrackingUtil.log(url, "GetDocHttpStatusException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机返回状态非 200：<{}> <{}> ", url, e.getMessage());
			ExceptionUtil.printStackTraceAsString(e);
			return null;
		} catch (IOException e) {
			EventTrackingUtil.log(url, "GetDocIOException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机返回 IO 解析异常：<{}> <{}> ", url, e.getMessage());
			return null;
		} catch (Exception e) {
			EventTrackingUtil.log(url, "GetDocException");
			log.error("------zch---getWebsiteInfo---请求 url 报错：<{}> <{}> ", url, e.getMessage());
			ExceptionUtil.printStackTraceAsString(e);
		}
		return document;
	}

	public static Document getDocumentByUrlAndProxy(String url, String urlHost, int timeout) {
		// zchtodo 后续都抓不了就需要用这个购买动态代理 ip
		String host = null;
		Integer port = null;
		Connection connect = Jsoup.connect(url).proxy(host, port).ignoreContentType(true);
		connect.userAgent(UserAgentUtil.getRandomUserAgent());
		connect.header("Host", urlHost);
		connect.header("Referer", "https://www.baidu.com/baidu?wd=" + urlHost);
		connect.header("Connection", "keep-alive");
		connect.header("upgrade-insecure-requests", "1");
		connect.timeout(timeout);
		connect.sslSocketFactory(jsoupSocketFactory());

		Document document = null;
		try {
			document = connect.get();
		} catch (SocketTimeoutException e) {
			EventTrackingUtil.log(url, "GetDocSocketTimeoutException");
			log.error("------zch---getWebsiteInfo---请求 url 超时：<{}> ", url);
			return null;
		} catch (ConnectException e) {
			EventTrackingUtil.log(url, "GetDocConnectException");
			log.error("------zch---getWebsiteInfo---请求 url 连接被拒绝：<{}> ", url);
			return null;
		} catch (UnknownHostException e) {
			EventTrackingUtil.log(url, "GetDocUnknownHostException");
			log.error("------zch---getWebsiteInfo---请求 url 不存在或已失效：<{}> ", url);
			return null;
		} catch (SSLHandshakeException e) {
			EventTrackingUtil.log(url, "GetDocSSLHandshakeException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机在握手期间关闭连接：<{}> ", url);
			return null;
		} catch (HttpStatusException e) {
			EventTrackingUtil.log(url, "GetDocHttpStatusException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机返回状态非 200：<{}> <{}> ", url, e.getMessage());
			ExceptionUtil.printStackTraceAsString(e);
			return null;
		} catch (IOException e) {
			EventTrackingUtil.log(url, "GetDocIOException");
			log.error("------zch---getWebsiteInfo---请求 url 远程主机返回 IO 解析异常：<{}> <{}> ", url, e.getMessage());
			return null;
		} catch (Exception e) {
			EventTrackingUtil.log(url, "GetDocException");
			log.error("------zch---getWebsiteInfo---请求 url 报错：<{}> <{}> ", url, e.getMessage());
			ExceptionUtil.printStackTraceAsString(e);
		}
		return document;
	}


	// =====================================网址解析 end=====================================


	// =====================================私有方法 start=====================================

	@SneakyThrows
	private static List<String> getSubUrlListCore(Document document, String cssQuery, String attributeKey, Boolean removeDuplicate) {
		if (null != document) {
			Elements elements = document.body().select(cssQuery);
			if (CollectionUtil.isNotEmpty(elements)) {
				List<String> subUrlList = new ArrayList<>();
				for (Element element : elements) {
					if (StringUtil.isBlank(attributeKey)) {
						// 如果不是读取属性，那就是直接读取 text 内容
						subUrlList.add(element.text());
						continue;
					}

					String href = element.attr(attributeKey);
					if (StringUtil.isNotBlank(href)) {
						if (StringUtil.containsIgnoreCase(href, ";")) {
							// 有遇到一些特殊 url 带有分号，这个无法解析，要过滤掉
							continue;
						}
						href = replaceBlank(href);
						try {
							href = URLUtil.getHost(URLUtil.toUrlForHttp(href)).toString();
						} catch (Exception e) {
							// 有些链接比较特殊，最终提取出来也是有问题，会报错，所以这种我们直接忽略
							continue;
						}
						subUrlList.add(href);
					}
				}
				if (removeDuplicate) {
					return CollectionUtil.removeDuplicate(subUrlList);
				} else {
					return subUrlList;
				}
			}
		}

		return null;
	}

	@SneakyThrows
	private static String replaceBlank(String str) {
		if (str != null) {
			Pattern p = Pattern.compile("\\s*|\\t|\\r|\\n");
			Matcher m = p.matcher(str);
			str = m.replaceAll("");
		}
		return str;
	}


	private static String getTitle(Document document) {
		String title = document.title();

		if (StringUtil.isBlank(title)) {
			// 有的网站是放在 meta 里面
			Element headlines = document.head();
			Elements titleElement = headlines.select("meta[name=title]");
			title = titleElement.attr("content");
		}

		return title;
	}

	private static String getKeywords(Document document) {
		String content = null;
		Element headlines = document.head();
		Elements elements = headlines.select("meta[name=keywords]");
		if (null != elements) {
			content = elements.attr("content");
			return content;
		}
		return content;
	}

	private static String getDescription(Document document) {
		String content = null;
		Element headlines = document.head();
		Elements elements = headlines.select("meta[name=description]");
		if (null != elements) {
			content = elements.attr("content");
			return content;
		}
		return content;
	}

	private static Set<String> getFaviconLink(Document document, String rootUrl, String urlScheme) {
		Set<String> faviconLinkSet = getFaviconLinkByHead(document, rootUrl);
		Set<String> setString = new HashSet<>();

		if (CollectionUtil.isEmpty(faviconLinkSet)) {
			return setString;
		}

		// 对拿到 icon 地址做一些细节处理
		for (String faviconLink : faviconLinkSet) {
			// 有些 faviconLink 是相对路径，所以我们要补齐
			if (!StringUtil.startsWith(faviconLink, "http") && !StringUtil.startsWith(faviconLink, "https")) {
				if (StringUtil.startsWith(faviconLink, "//")) {
					// 有的是双斜杠开头，那就只要加上 http、https 即可
					faviconLink = urlScheme + ":" + faviconLink;
				} else if (StringUtil.startsWith(faviconLink, "/")) {
					faviconLink = rootUrl + faviconLink;
				} else if (StringUtil.startsWith(faviconLink, "./")) {
					faviconLink = StringUtil.removeStart(faviconLink, ".");
					faviconLink = rootUrl + faviconLink;
				} else {
					faviconLink = rootUrl + "/" + faviconLink;
				}
			}

			//有的 icon 链接参数是 base64 要做特殊处理
			if (StringUtil.containsIgnoreCase(faviconLink, ";base64,")) {
				faviconLink = rootUrl + "/favicon.ico";
			}

			// 去掉一些后缀带参数的
			URL urlForHttp = URLUtil.toUrlForHttp(faviconLink);
			faviconLink = urlForHttp.getProtocol() + "://" + urlForHttp.getHost() + urlForHttp.getPath();

			if (StringUtil.containsIgnoreCase(faviconLink, ".ico/")) {
				// 有的链接有斜杠参数，比如：https://static.wikia.nocookie.net/logopedia/images/4/4a/Site-favicon.ico/revision/latest?cb=20210629045729
				faviconLink = StringUtil.substringBefore(faviconLink, ".ico/");
				faviconLink = faviconLink + ".ico";
			}
			if (StringUtil.containsIgnoreCase(faviconLink, ".png/")) {
				// 有的链接有斜杠参数，比如：https://static.wikia.nocookie.net/logopedia/images/4/4a/Site-favicon.png/revision/latest?cb=20210629045729
				faviconLink = StringUtil.substringBefore(faviconLink, ".png/");
				faviconLink = faviconLink + ".png";
			}
			if (StringUtil.containsIgnoreCase(faviconLink, ".jpg/")) {
				// 有的链接有斜杠参数，比如：https://static.wikia.nocookie.net/logopedia/images/4/4a/Site-favicon.jpg/revision/latest?cb=20210629045729
				faviconLink = StringUtil.substringBefore(faviconLink, ".jpg/");
				faviconLink = faviconLink + ".jpg";
			}
			if (StringUtil.containsIgnoreCase(faviconLink, "!")) {
				// 有的链接感叹号参数，比如：http://www.YouMeek.com/aa.png!large
				faviconLink = StringUtil.substringBefore(faviconLink, "!");
			}

			setString.add(faviconLink);
		}


		return setString;
	}

	private static Set<String> getFaviconLinkByHead(Document document, String rootUrl) {
		Element headlines = document.head();
		String faviconLink = null;
		Set<String> setString = new HashSet<>();
		Elements iconElement2 = headlines.select("link[rel=shortcut icon]");
		if (null != iconElement2) {
			faviconLink = iconElement2.attr("href");
			if (StringUtil.isNotBlank(faviconLink)) {
				setString.add(faviconLink);
			}
		}

		Elements iconElement3 = headlines.select("link[rel=icon]");
		if (null != iconElement3) {
			faviconLink = iconElement3.attr("href");
			if (StringUtil.isNotBlank(faviconLink)) {
				setString.add(faviconLink);
			}
		}

		if (CollectionUtil.isNotEmpty(setString)) {
			// 前面的抓取是比较精准的，如果已经有值了，直接使用前面的
			return setString;
		}

		Elements iconElement4 = headlines.select("link[rel=alternate icon]");
		if (null != iconElement4) {
			faviconLink = iconElement4.attr("href");
			if (StringUtil.isNotBlank(faviconLink)) {
				setString.add(faviconLink);
			}
		}

		Elements iconElement44 = headlines.select("link[rel=apple-touch-icon]");
		if (null != iconElement44) {
			faviconLink = iconElement44.attr("href");
			if (StringUtil.isNotBlank(faviconLink)) {
				setString.add(faviconLink);
			}
		}

		Elements elements = headlines.select("link[href~=.*\\.(ico)]");
		if (CollectionUtil.isNotEmpty(elements)) {
			for (Element iconElement1 : elements) {
				if (null != iconElement1) {
					String relValue = iconElement1.attr("rel");
					if (StringUtil.isNotBlank(relValue) && StringUtil.containsIgnoreCase(relValue, "stylesheet")) {
						// 有些 css 文件也有 ico、icon 中间名，也会被匹配到，比如：<link rel="stylesheet" href="css/themes/jquery.mobile.icons.min.css" />
						continue;
					}
					faviconLink = iconElement1.attr("href");
					if (StringUtil.isNotBlank(faviconLink)) {
						setString.add(faviconLink);
					}
				}
			}
		}

		if (CollectionUtil.isNotEmpty(setString)) {
			// 前面的抓取是比较精准的，如果已经有值了，直接使用前面的
			return setString;
		}

		// 后面这些比较不精准，实在没有才读取里面的值
		Element iconElement5 = headlines.select("link[href~=.*\\.(png|jpg|jpeg)]").first();
		if (null != iconElement5) {
			faviconLink = iconElement5.attr("href");
			if (StringUtil.isNotBlank(faviconLink)) {
				if (StringUtil.endsWithIgnoreCase(faviconLink, ".png") || StringUtil.endsWithIgnoreCase(faviconLink, ".jpg") || StringUtil.endsWithIgnoreCase(faviconLink, ".jpeg")) {
					// 有些地址不是以 png、jpg 结尾的，但是也匹配到了，所以要再做一个判断，比如：https://hi.pngtree.com/
					setString.add(faviconLink);
				}
			}
		}

		// 还没找到只能假设就是根路径下了
		if (CollectionUtil.isEmpty(setString)) {
			faviconLink = rootUrl + "/favicon.ico";
			setString.add(faviconLink);
		}
		return setString;
	}


	/**
	 * 解决一些自签名证书无法抓取的问题
	 *
	 * @return
	 */
	private static SSLSocketFactory jsoupSocketFactory() {
		TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
			public X509Certificate[] getAcceptedIssuers() {
				return null;
			}

			public void checkClientTrusted(X509Certificate[] certs, String authType) {
			}

			public void checkServerTrusted(X509Certificate[] certs, String authType) {
			}
		}};

		try {
			SSLContext sslContext = SSLContext.getInstance("TLS");
			sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
			return sslContext.getSocketFactory();
		} catch (NoSuchAlgorithmException | KeyManagementException e) {
			log.error("------zch------Failed to create a SSL socket factory");
			ExceptionUtil.printStackTraceAsString(e);
			throw new RuntimeException("Failed to create a SSL socket factory", e);
		}
	}

	// =====================================私有方法 end=====================================

}



